# Initialize the package
import sys
repo = '/root/biosignal_processing_package'
sys.path.insert(1, repo)
sys.path.insert(1, repo + '/biosignal_processing_package' )
Import the package functions
import pandas as pd
import numpy as np
# package functions
from preprocessing.signal_transform import signal_downsample, segment_signal
from preprocessing.signal_filter import filter_ppg
from plotting.signal_plots import plot_signal
from plotting.signal_plots import plot_filtered
from savvyppg import ppg_module
Reading and importing the signals
# read files
files = [
#Signals we can produce output without obvious errors from:
"0-VagalTone-1619468340-ppg.csv.gz",
"0-VagalTone-1619541388-ppg.csv.gz",
"4807805364203339305-RelaxGame-1619035664-ppg.csv.gz",
"5118090038914056700-RelaxGame-1619042816-ppg.csv.gz",
#Datasets with significant movement noise:
"4835847629383064719-RelaxGame-1619041516-ppg.csv.gz",
"5448292036127487645-Pizza-1619546545-ppg.csv.gz",
"5359311199602188842-Potions-1619556392-ppg.csv.gz",
"5359311199602188842-VagalTone-1619557200-ppg.csv.gz",
"4650373652542159307-VagalTone-1619464304-ppg.csv.gz",
"0-Potions-1622059739-ppg.csv.gz"
]
signals = []
for file in files:
filepath = repo + "/data/" + file
signal = pd.read_csv(filepath, compression = 'gzip')
signal.columns = [col.strip() for col in signal.columns] # strip away spaces from col names
signals.append(signal)
Inspect the structure of the data and estimate the sampling rate of the signal
signal = signals[3]
print(signal.shape)
display(signal.head())
sampling_rate = len(signal)/(signal.ppg_time[len(signal)-2])
print("sampling rate: " + str(sampling_rate))
# comparing available signals
average_signal = (signal.ppg0+signal.ppg1+signal.ppg2)/3
plot_signal(
[signal.ppg0.to_list(),signal.ppg1.to_list(),signal.ppg2.to_list(),average_signal.to_list()],
[135,135,135,135],
labels=["ppg0","ppg1","ppg2","average"],
x_axis_label = "Time (s)"
)
Visualizing all the signals
# visualizing all sets of signals
from preprocessing.signal_transform import signal_downsample
n= 0
for signal in signals:
#downsample for plotting
print(files[n])
plot_signal(
[
signal_downsample(signal.ppg0.to_list(), 135, downsample_factor = 3),
signal_downsample(signal.ppg1.to_list(), 135, downsample_factor = 3),
signal_downsample(signal.ppg2.to_list(), 135, downsample_factor = 3)
],
[135/3,135/3,135/3],
labels=["ppg0","ppg1","ppg2"],
x_axis_label = "Time (s)"
)
n = n + 1
# make list of only ppg0 signals
ppg0_list = [signal.ppg0.to_list() for signal in signals]
sr = 135
# actual cleaning
ppg0_clean = [filter_ppg(ppg0_sig, sr) for ppg0_sig in ppg0_list]
# comparing raw and clean signals, signals are downsampled for visualization
for i in range(len(signals)):
print(files[i])
plot_filtered(
signal_downsample(ppg0_list[i],135,downsample_factor = 3),
signal_downsample(ppg0_clean[i],135,downsample_factor = 3),
sr/3
)
# Preprocess and inspect beat level quality of each ppg signal
savvyppg_signals = []
for i in range(len(signals)):
print("Processing " + files[i])
sig = ppg_module.ppgSignal(ppg0_list[i],sr, predict_beats=True)
savvyppg_signals.append(sig)
# Visualize the beat level signal quality
from plotting.ppg_plotting import ppg_plot_quality
for i in range(len(signals)):
print(files[i])
sqi = savvyppg_signals[i].beats_df.pred_label.sum()/savvyppg_signals[i].beats_num
print("Proportion of good beats/total detected beats: ", sqi * 100)
ppg_plot_quality((savvyppg_signals[i]).data_ppg,(savvyppg_signals[i]).beats_df,sr)